import scrapy
from ..items import MenuItem

class GetmenuSpider(scrapy.Spider):
    name = "getmenus"
    allowed_domains = ["www.meishij.net"]
    start_urls = ["https://www.meishij.net/fenlei/chaofan/1"]  # 爬取的目标网址

    def parse(self, response):
        urls = response.xpath('//div[@class="imgw"]/a[@class="list_s2_item_img"]/@href').extract()  # 菜谱详情的链接
        # 每爬取到一条链接，就提交给下一个函数爬取菜谱详情。
        for url in urls:
            yield scrapy.Request(url, callback=self.parse2, dont_filter=True)  # 提交url到下一层
    def parse2(self, response):
        menuitem = MenuItem()
        # 菜谱名
        menuitem['name'] = response.xpath('//div[@class="recipe_header_c"]/div[2]/h1/text()').extract()
        # 主材料+整合
        mat1 = ""
        material1list = response.xpath(
            '//div[@class="recipe_ingredientsw"]/div[1]/div[2]/strong/a/text()').extract()
        for i in range(0, len(material1list)):
            mat1 = mat1 + material1list[i]
        menuitem['material1'] = mat1
        # 配料+整合
        mat2 = ""
        material2list = response.xpath(
            '//div[@class="recipe_ingredientsw"]/div[2]/div[2]/strong/a/text()').extract()
        for i in range(0, len(material2list)):
            mat2 = mat2 + material2list[i]
        menuitem['material2'] = mat2
        # 难度等级
        menuitem['level'] = response.xpath('//div[@class="info2"]/div[4]/strong/text()').extract()
        # 所需时间
        menuitem['needtime'] = response.xpath('//div[@class="info2"]/div[3]/strong/text()').extract()
        # 菜谱图片
        menuitem['img'] = response.xpath('//div[@class="recipe_header_c"]/div[1]/img/@src').extract()
        # 步骤+整合
        steplist = response.xpath('//div[@class="recipe_step"]/div[@class="step_content"]/p/text()').extract()
        step = ""
        for i in range(0, len(steplist)):
            step = step + steplist[i]
        menuitem['step'] = step
        # 热量、含糖量、脂肪含量
        menuitem['energy'] = response.xpath('//div[@class="jztbox"]/div[3]/div[1]/text()').extract()
        menuitem['sugar'] = response.xpath('//div[@class="jztbox"]/div[2]/div[1]/text()').extract()
        menuitem['fat'] = response.xpath('//div[@class="jztbox"]/div[4]/div[1]/text()').extract()
        # 格式化--列表转字符串
        menuitem['name'] = ''.join(menuitem['name'])
        menuitem['level'] = ''.join(menuitem['level'])
        menuitem['needtime'] = ''.join(menuitem['needtime'])
        menuitem['img'] = ''.join(menuitem['img'])
        menuitem['energy'] = ''.join(menuitem['energy'])
        menuitem['sugar'] = ''.join(menuitem['sugar'])
        menuitem['fat'] = ''.join(menuitem['fat'])
        # print("获取到第"+str(i+1)+"条菜谱数据")
        # print(menuitem)
        return menuitem
